When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
communities <- read_csv("data/communities.data")
## Rows: 1994 Columns: 128
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): communityname
## dbl (127): state, county, community, fold, population, householdsize, racepc...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
communities <- setNames(communities, c("State","County","Community","CommunityName","Fold","Population","HouseholdSize","RacePctBlack","RacePctWhite","RacePctAsian","RacePctHisp","AgePct12t21","AgePct12t29","AgePct16t24","AgePct65up","NumbUrban","PctUrban","MedIncome","PctWWage","PctWFarmSelf","PctWInvInc","PctWSocSec","PctWPubAsst","PctWRetire","MedFamInc","PerCapInc","WhitePerCap","BlackPerCap","IndianPerCap","AsianPerCap","OtherPerCap","HispPerCap","NumUnderPov","PctPopUnderPov","PctLess9thGrade","PctNotHSGrad","PctBSorMore","PctUnemployed","PctEmploy","PctEmplManu","PctEmplProfServ","PctOccupManu","PctOccupMgmtProf","MalePctDivorce","MalePctNevMarr","FemalePctDiv","TotalPctDiv","PersPerFam","PctFam2Par","PctKids2Par","PctYoungKids2Par","PctTeen2Par","PctWorkMomYoungKids","PctWorkMom","NumIlleg","PctIlleg","NumImmig","PctImmigRecent","PctImmigRec5","PctImmigRec8","PctImmigRec10","PctRecentImmig","PctRecImmig5","PctRecImmig8","PctRecImmig10","PctSpeakEnglOnly","PctNotSpeakEnglWell","PctLargHouseFam","PctLargHouseOccup","PersPerOccupHous","PersPerOwnOccHous","PersPerRentOccHous","PctPersOwnOccup","PctPersDenseHous","PctHousLess3BR","MedNumBR","HousVacant","PctHousOccup","PctHousOwnOcc","PctVacantBoarded","PctVacMore6Mos","MedYrHousBuilt","PctHousNoPhone","PctWOFullPlumb","OwnOccLowQuart","OwnOccMedVal","OwnOccHiQuart","RentLowQ","RentMedian","RentHighQ","MedRent","MedRentPctHousInc","MedOwnCostPctInc","MedOwnCostPctIncNoMtg","NumInShelters","NumStreet","PctForeignBorn","PctBornSameState","PctSameHouse85","PctSameCity85","PctSameState85","LemasSwornFT","LemasSwFTPerPop","LemasSwFTFieldOps","LemasSwFTFieldPerPop","LemasTotalReq","LemasTotReqPerPop","PolicRepPerOffic","PolicPerPop","RacialMatchCommPol","PctPolicWhite","PctPolicBlack","PctPolicHisp","PctPolicAsian","PctPolicMinor","OfficAssgnDrugUnits","NumKindsDrugSeiz","PolicAveOTWorked","LandArea","PopDens","PctUsePubTrans","PolicCars","PolicOperBudg","LemasPctPolicOnPatr","LemasGangUnitDeploy","LemasPctOfficDrugUn","PolicBudgPerPop","ViolentCrimesPerPop"))
communities <- transform(communities,
State == as.integer(State),
County == as.integer(County),
Community == as.integer(Community))
communities$Region <- as.factor(ifelse(communities$State%in%c(8,22,24,32,34,35,42,44,51), "Northeast", ifelse(communities$State%in%c(16,17,18,19,25,26,28,30,37,39,46,55), "Midwest", ifelse(communities$State%in%c(1,5,9,10,11,12,20,21,23,27,36,40,45,47,48,52,54), "South", ifelse(communities$State%in%c(2,4,6,7,14,15,29,31,33,41,50,53,56), "West", "Territory")))))
communities <- relocate(communities, Region, .before="Fold")
communities <- transform(communities, Region = as.factor(Region))
head(communities)
## State County Community CommunityName Region Fold Population
## 1 8 NA NA Lakewoodcity Northeast 1 0.19
## 2 53 NA NA Tukwilacity West 1 0.00
## 3 24 NA NA Aberdeentown Northeast 1 0.00
## 4 34 5 81440 Willingborotownship Northeast 1 0.04
## 5 42 95 6096 Bethlehemtownship Northeast 1 0.01
## 6 6 NA NA SouthPasadenacity West 1 0.02
## HouseholdSize RacePctBlack RacePctWhite RacePctAsian RacePctHisp AgePct12t21
## 1 0.33 0.02 0.90 0.12 0.17 0.34
## 2 0.16 0.12 0.74 0.45 0.07 0.26
## 3 0.42 0.49 0.56 0.17 0.04 0.39
## 4 0.77 1.00 0.08 0.12 0.10 0.51
## 5 0.55 0.02 0.95 0.09 0.05 0.38
## 6 0.28 0.06 0.54 1.00 0.25 0.31
## AgePct12t29 AgePct16t24 AgePct65up NumbUrban PctUrban MedIncome PctWWage
## 1 0.47 0.29 0.32 0.20 1.0 0.37 0.72
## 2 0.59 0.35 0.27 0.02 1.0 0.31 0.72
## 3 0.47 0.28 0.32 0.00 0.0 0.30 0.58
## 4 0.50 0.34 0.21 0.06 1.0 0.58 0.89
## 5 0.38 0.23 0.36 0.02 0.9 0.50 0.72
## 6 0.48 0.27 0.37 0.04 1.0 0.52 0.68
## PctWFarmSelf PctWInvInc PctWSocSec PctWPubAsst PctWRetire MedFamInc PerCapInc
## 1 0.34 0.60 0.29 0.15 0.43 0.39 0.40
## 2 0.11 0.45 0.25 0.29 0.39 0.29 0.37
## 3 0.19 0.39 0.38 0.40 0.84 0.28 0.27
## 4 0.21 0.43 0.36 0.20 0.82 0.51 0.36
## 5 0.16 0.68 0.44 0.11 0.71 0.46 0.43
## 6 0.20 0.61 0.28 0.15 0.25 0.62 0.72
## WhitePerCap BlackPerCap IndianPerCap AsianPerCap OtherPerCap HispPerCap
## 1 0.39 0.32 0.27 0.27 0.36 0.41
## 2 0.38 0.33 0.16 0.30 0.22 0.35
## 3 0.29 0.27 0.07 0.29 0.28 0.39
## 4 0.40 0.39 0.16 0.25 0.36 0.44
## 5 0.41 0.28 0.00 0.74 0.51 0.48
## 6 0.76 0.77 0.28 0.52 0.48 0.60
## NumUnderPov PctPopUnderPov PctLess9thGrade PctNotHSGrad PctBSorMore
## 1 0.08 0.19 0.10 0.18 0.48
## 2 0.01 0.24 0.14 0.24 0.30
## 3 0.01 0.27 0.27 0.43 0.19
## 4 0.01 0.10 0.09 0.25 0.31
## 5 0.00 0.06 0.25 0.30 0.33
## 6 0.01 0.12 0.13 0.12 0.80
## PctUnemployed PctEmploy PctEmplManu PctEmplProfServ PctOccupManu
## 1 0.27 0.68 0.23 0.41 0.25
## 2 0.27 0.73 0.57 0.15 0.42
## 3 0.36 0.58 0.32 0.29 0.49
## 4 0.33 0.71 0.36 0.45 0.37
## 5 0.12 0.65 0.67 0.38 0.42
## 6 0.10 0.65 0.19 0.77 0.06
## PctOccupMgmtProf MalePctDivorce MalePctNevMarr FemalePctDiv TotalPctDiv
## 1 0.52 0.68 0.40 0.75 0.75
## 2 0.36 1.00 0.63 0.91 1.00
## 3 0.32 0.63 0.41 0.71 0.70
## 4 0.39 0.34 0.45 0.49 0.44
## 5 0.46 0.22 0.27 0.20 0.21
## 6 0.91 0.49 0.57 0.61 0.58
## PersPerFam PctFam2Par PctKids2Par PctYoungKids2Par PctTeen2Par
## 1 0.35 0.55 0.59 0.61 0.56
## 2 0.29 0.43 0.47 0.60 0.39
## 3 0.45 0.42 0.44 0.43 0.43
## 4 0.75 0.65 0.54 0.83 0.65
## 5 0.51 0.91 0.91 0.89 0.85
## 6 0.44 0.62 0.69 0.87 0.53
## PctWorkMomYoungKids PctWorkMom NumIlleg PctIlleg NumImmig PctImmigRecent
## 1 0.74 0.76 0.04 0.14 0.03 0.24
## 2 0.46 0.53 0.00 0.24 0.01 0.52
## 3 0.71 0.67 0.01 0.46 0.00 0.07
## 4 0.85 0.86 0.03 0.33 0.02 0.11
## 5 0.40 0.60 0.00 0.06 0.00 0.03
## 6 0.30 0.43 0.00 0.11 0.04 0.30
## PctImmigRec5 PctImmigRec8 PctImmigRec10 PctRecentImmig PctRecImmig5
## 1 0.27 0.37 0.39 0.07 0.07
## 2 0.62 0.64 0.63 0.25 0.27
## 3 0.06 0.15 0.19 0.02 0.02
## 4 0.20 0.30 0.31 0.05 0.08
## 5 0.07 0.20 0.27 0.01 0.02
## 6 0.35 0.43 0.47 0.50 0.50
## PctRecImmig8 PctRecImmig10 PctSpeakEnglOnly PctNotSpeakEnglWell
## 1 0.08 0.08 0.89 0.06
## 2 0.25 0.23 0.84 0.10
## 3 0.04 0.05 0.88 0.04
## 4 0.11 0.11 0.81 0.08
## 5 0.04 0.05 0.88 0.05
## 6 0.56 0.57 0.45 0.28
## PctLargHouseFam PctLargHouseOccup PersPerOccupHous PersPerOwnOccHous
## 1 0.14 0.13 0.33 0.39
## 2 0.16 0.10 0.17 0.29
## 3 0.20 0.20 0.46 0.52
## 4 0.56 0.62 0.85 0.77
## 5 0.16 0.19 0.59 0.60
## 6 0.25 0.19 0.29 0.53
## PersPerRentOccHous PctPersOwnOccup PctPersDenseHous PctHousLess3BR MedNumBR
## 1 0.28 0.55 0.09 0.51 0.5
## 2 0.17 0.26 0.20 0.82 0.0
## 3 0.43 0.42 0.15 0.51 0.5
## 4 1.00 0.94 0.12 0.01 0.5
## 5 0.37 0.89 0.02 0.19 0.5
## 6 0.18 0.39 0.26 0.73 0.0
## HousVacant PctHousOccup PctHousOwnOcc PctVacantBoarded PctVacMore6Mos
## 1 0.21 0.71 0.52 0.05 0.26
## 2 0.02 0.79 0.24 0.02 0.25
## 3 0.01 0.86 0.41 0.29 0.30
## 4 0.01 0.97 0.96 0.60 0.47
## 5 0.01 0.89 0.87 0.04 0.55
## 6 0.02 0.84 0.30 0.16 0.28
## MedYrHousBuilt PctHousNoPhone PctWOFullPlumb OwnOccLowQuart OwnOccMedVal
## 1 0.65 0.14 0.06 0.22 0.19
## 2 0.65 0.16 0.00 0.21 0.20
## 3 0.52 0.47 0.45 0.18 0.17
## 4 0.52 0.11 0.11 0.24 0.21
## 5 0.73 0.05 0.14 0.31 0.31
## 6 0.25 0.02 0.05 0.94 1.00
## OwnOccHiQuart RentLowQ RentMedian RentHighQ MedRent MedRentPctHousInc
## 1 0.18 0.36 0.35 0.38 0.34 0.38
## 2 0.21 0.42 0.38 0.40 0.37 0.29
## 3 0.16 0.27 0.29 0.27 0.31 0.48
## 4 0.19 0.75 0.70 0.77 0.89 0.63
## 5 0.30 0.40 0.36 0.38 0.38 0.22
## 6 1.00 0.67 0.63 0.68 0.62 0.47
## MedOwnCostPctInc MedOwnCostPctIncNoMtg NumInShelters NumStreet PctForeignBorn
## 1 0.46 0.25 0.04 0 0.12
## 2 0.32 0.18 0.00 0 0.21
## 3 0.39 0.28 0.00 0 0.14
## 4 0.51 0.47 0.00 0 0.19
## 5 0.51 0.21 0.00 0 0.11
## 6 0.59 0.11 0.00 0 0.70
## PctBornSameState PctSameHouse85 PctSameCity85 PctSameState85 LemasSwornFT
## 1 0.42 0.50 0.51 0.64 0.03
## 2 0.50 0.34 0.60 0.52 NA
## 3 0.49 0.54 0.67 0.56 NA
## 4 0.30 0.73 0.64 0.65 NA
## 5 0.72 0.64 0.61 0.53 NA
## 6 0.42 0.49 0.73 0.64 NA
## LemasSwFTPerPop LemasSwFTFieldOps LemasSwFTFieldPerPop LemasTotalReq
## 1 0.13 0.96 0.17 0.06
## 2 NA NA NA NA
## 3 NA NA NA NA
## 4 NA NA NA NA
## 5 NA NA NA NA
## 6 NA NA NA NA
## LemasTotReqPerPop PolicRepPerOffic PolicPerPop RacialMatchCommPol
## 1 0.18 0.44 0.13 0.94
## 2 NA NA NA NA
## 3 NA NA NA NA
## 4 NA NA NA NA
## 5 NA NA NA NA
## 6 NA NA NA NA
## PctPolicWhite PctPolicBlack PctPolicHisp PctPolicAsian PctPolicMinor
## 1 0.93 0.03 0.07 0.1 0.07
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## OfficAssgnDrugUnits NumKindsDrugSeiz PolicAveOTWorked LandArea PopDens
## 1 0.02 0.57 0.29 0.12 0.26
## 2 NA NA NA 0.02 0.12
## 3 NA NA NA 0.01 0.21
## 4 NA NA NA 0.02 0.39
## 5 NA NA NA 0.04 0.09
## 6 NA NA NA 0.01 0.58
## PctUsePubTrans PolicCars PolicOperBudg LemasPctPolicOnPatr
## 1 0.20 0.06 0.04 0.9
## 2 0.45 NA NA NA
## 3 0.02 NA NA NA
## 4 0.28 NA NA NA
## 5 0.02 NA NA NA
## 6 0.10 NA NA NA
## LemasGangUnitDeploy LemasPctOfficDrugUn PolicBudgPerPop ViolentCrimesPerPop
## 1 0.5 0.32 0.14 0.20
## 2 NA 0.00 NA 0.67
## 3 NA 0.00 NA 0.43
## 4 NA 0.00 NA 0.12
## 5 NA 0.00 NA 0.03
## 6 NA 0.00 NA 0.14
Things to investigate: * RacePerCap & VCPP * Incomes/Poverty & VCPP * Education & VCPP * Employment & VCPP * Rent & VCPP * PolicPerPop & VCPP
Significant = 95% CIs have no overlap at VCPP = 0 and VCPP = 1 Not Sig = 95% CIs have some overlap at VCPP = 0 and VCPP = 1
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
MedIncome - Significant MedFamInc - Significant PerCapInc - Significant
PctPopUnderPov - Significant
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
PctLess9thGrade - Significant PctNotHSGrad - Significant PctBSorMore -
Significant
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
PctEmploy - Significant PctEmplManu - NOT Sig PctEmplProfServ -
Significant PctOccupMgmtProf - Significant
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
MedRentPctHousInc - Significant MedOwnCostPctInc - Significant
MedOwnCostPctIncNoMtg - Significant NumInShelters + NumInStreet -
Significant # i wish i had the variable of like,
(numinshelters+numinstreet)/(population) but because it’s all normalized
htat’s not possible
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1675 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1675 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1675 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 1675 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1675 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 1675 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1675 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 1675 rows containing missing values or values outside the scale range
## (`geom_point()`).
LemasSwFTPerPop - Significant LemasTotReqPerPop - Significant
RacialMatchCommPol - Significant PolicBudgPerPop - NOT Sig
communities$Region <- communities |> mutate(State = case_when(
communities$State%in%c(8,22,24,32,34,35,42,44,51) ~ "Northeast",
communities$State%in%c(16,17,18,19,25,26,28,30,37,39,46,55) ~ "Midwest",
communities$State%in%c(1,5,9,10,11,12,20,21,23,27,36,40,45,47,48,52,54) ~ "South",
communities$State%in%c(2,4,6,7,14,15,29,31,33,41,50,53,56) ~ "West",
TRUE ~ "Territory"
))
us.region <- function(State) {
if (State %in% c(8,22,24,32,34,35,42,44,51)) {
return("Northeast")
} else if (State %in% c(16,17,18,19,25,26,28,30,37,39,46,55)) {
return("Midwest")
} else if (State %in% c(1,5,9,10,11,12,20,21,23,27,36,40,45,47,48,52,54)) {
return("South")
} else if (State %in% c(2,4,6,7,14,15,29,31,33,41,50,53,56)) {
return("West")
} else {
return("Territory")
}
}
communities <- communities |>
mutate(USRegion = sapply(State, us.region))
ggplot(data = communities,
mapping = aes(x = ViolentCrimesPerPop)) +
geom_histogram(binwidth = 0.05) +
labs(
x = "Violent Crimes Per Population",
y = "Frequency",
title = "Violent Crime in the USA",
subtitle = "1990 & 1995 Data"
)
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.